# -*- codeing = utf-8 -*-
# @Time : 2021/3/15 23:04
# @Author : Leon
# @File : spider.py
# @Software: PyCharm

import bs4       #网页解析，获取数据
import re        #正则表达式，进行文字匹配
import urllib.request,urllib.error       #制定url，获取网页数据
import xlwt    #进行excel操作
import sqlite3   #进行sqlite数据库操作
def main():
    baseurl = "https://movie.douban.com/top250?start="
    datalist = getData(baseurl)

#爬取网页
def getData(baseurl):
    dataList = []
    for i in range(0,10):
        url = baseurl + str(i*25)
        html = askURL(url)
    return dataList


def askURL(url):
    head = {
        "User-Agent":"Mozilla / 5.0(Windows NT 10.0;Win64; x64) AppleWebKit / 537.36(KHTML,like Gecko) Chrome / 89.0.4389.82 Safari / 537.36"
    }
    request = urllib.request.Request(url,headers=head)    #读取连接
    html = ""
    try:
        response = urllib.request.urlopen(request)      #打开连接
        html = response.read().decode("utf-8")      #读取 储存信息
        print(html)
    except urllib.error.URLError as e:
        if hasattr(e,"code"):
            print(e.code)
        if hasattr(e,"reason"):
            print(e.reason)
    return html
#保存数据
def saveData(savePath):
    print("djjdjd")
if __name__ == '__main__':
    main()
